*******************************************************

/*  Fishing in troubled water: The Impact of US-China Trade War on Vietnam
 Pham Phuong Ngoc and Dainn Wie
 
 This is do file to construct tariff different variables and combine with VES data.

Inputs:

1. HS2012.dta 
2. import_data.dta 
3. China_target_hs8.dta 
4. China_target_hs10.dta 
5. VES.dta 
6. PPI_Vietnam.xlsx 
7. Vietnamtoworld.dta 

*/

clear all
capture log close
set more off

cd "C:\Replication"

********************************************************************************
*** PART 1: Construct tariff change in 2018 and 2019 
********************************************************************************

* Open the trade import value data of US from all countries
use "Data\import_data.dta", clear

rename m_val0 cifvalue_world
rename m_val1 cifvalue_china
drop if (cifvalue_world==. & cifvalue_china==.)

keep if year==2015

gen hs4=int(hs6/100)
label var hs4 "HS4 Code"

order hs10 hs8 hs6 hs4 year

* Convert HS 6 digit code to ISIC 
merge m:1 hs6 using "Data\HS2012.dta", keep (match master) keepusing (isic3) generate (merge1)

replace isic3 =. if isic3==1| isic3==3 | isic3==26 | isic3==27 | merge1==1
count if isic3==.

drop merge1

preserve
egen totalvalue = sum (cifvalue_world)
egen value = sum (cifvalue_world) if isic3==.
generate a = value/totalvalue
sum a

egen totalvalue1 = sum(cifvalue_china)
egen value1 = sum (cifvalue_china) if isic3==.
generate b = value1/totalvalue1
sum b
restore

* I will drop the product code which cannot convert to ISIC 3 digit code 
drop if isic3==.

* generate variable for total value of US import in each industry code
egen value = sum (cifvalue_world), by (isic3)

* generate import value weight of each 10-digit HS code within 3 digit industries using China export data
generate weight=cifvalue_china/value
label variable weight "Weight is determined by china import in each hs over total US import of industry"

* merge to get the target products in US - China trade war
merge m:1 hs8 using "Data\China_target_hs8.dta", keep (match master) generate (merge2)
merge m:1 hs10 using "Data\China_target_hs10.dta", keep (match master) generate (merge3)
replace heading=heading1 if merge3==3
drop heading1

* generate dummy variable for product subject to US solar tariff, washer tariff, alum tariff and steel tariff
gen hit=0
replace hit=1 if hs8==85414060 | hs8==85013180 | hs8==85016100 | hs8==85072080
replace hit=1 if hs8==84501100 | hs8== 84502000 | hs8==84509020 | hs8==84509060
replace hit=1 if hs4==7601 | hs4==7604 | hs4==7605 | hs4==7606 | hs4==7607 | hs4==7608 | hs4==7609 | hs10==7616995160 | hs10==7616995170
replace hit=1 if (hs6>=720610 & hs6<=721650) | (hs6>=721699 & hs6<=730110) | hs6==730210 | (hs6>=730240 & hs6<=730290) | (hs6 >= 730410 & hs6<= 730690)

label var hit "Product suject to US solar, washer, alum and steel tariff"

* generate the average US import tariff difference between Vietnam and China in 2018 and 2019
generate deta_tariff2018=0
replace deta_tariff2018=((25*0.25/31 + 0.25*5)/12) if heading==99038801
replace deta_tariff2018=((8*0.25/31 + 0.25*4)/12) if heading==99038802
replace deta_tariff2018=((6*0.10/30 + 0.10*3)/12) if heading==99038803 | heading==99038804
replace deta_tariff2018=0 if hit==1
tab deta_tariff2018
sum deta_tariff2018 if deta_tariff2018!=0

generate deta_tariff2019=0
replace deta_tariff2019=0.25 if heading==99038801 | heading==99038802
replace deta_tariff2019=((0.10*4+0.10*10/31+21*0.25/31+0.25*7)/12) if heading==99038803| heading==99038804
replace deta_tariff2019=(0.15*4/12) if heading==99038815
replace deta_tariff2019=0 if hit==1
tab deta_tariff2019
sum deta_tariff2019 if deta_tariff2019!=0

* interact the US import tariff difference with the weights of china import in each hs over total US import of industry
generate wt_tariff2018 = deta_tariff2018 * weight
generate wt_tariff2019 = deta_tariff2019 * weight

* interact the US import tariff difference with the trade volume of Chinese products
generate vol_tariff2018 = deta_tariff2018 * cifvalue_china
generate vol_tariff2019 = deta_tariff2019 * cifvalue_china

* collapse to get average US import tariff difference between Vietnam and China in 2018 and 2019 in each 3-digit ISIC industry code
collapse (sum) deta_tariff2018 deta_tariff2019 wt_tariff2018 wt_tariff2019 vol_tariff2018 vol_tariff2019, by(isic3) 

* for tariff different using share of trade as weight
sum wt_tariff2018 wt_tariff2019

generate wt_tariff2017=0 
gen target=0
replace target=1 if wt_tariff2018!=0 | wt_tariff2019!=0

* for tariff difference using trade as weight
sum vol_tariff2018 vol_tariff2019

gen vol_tariff2017=0

* * for unweighted tariff difference
sum deta_tariff2018 deta_tariff2019

gen deta_tariff2017=0

* reshape the dataset
reshape long deta_tariff wt_tariff vol_tariff, i(isic3 target) j(year)

label variable deta_tariff "The unweighted tariff differences in each 3 digit industry"
label variable wt_tariff "The tariff difference in each 3 digit industry using weights of china import in each hs over total US import of industry"
label variable vol_tariff "The tariff difference in each 3 digit industry using weights of Chinese trade volume in each hs"

label variable target "Industry is targeted by trade war"

replace wt_tariff=0 if wt_tariff==.

replace deta_tariff=0 if deta_tariff==.
replace deta_tariff=deta_tariff/1000

replace vol_tariff=0 if vol_tariff==.
replace vol_tariff=vol_tariff/1000

sort year
by year: sum deta_tariff
by year: sum wt_tariff
by year: sum vol_tariff


* save the dataset
save "Data\tariff_difference.dta", replace


********************************************************************************
*** PART 2: Classifying Trade Sectors
********************************************************************************

// Trade sectors are the industries which Vietnam export to foreign countries - I will used Trade data of vietnam in 2015 to classifying //

* Open dataset
use "Data\Vietnamtoworld.dta", clear

* rename the variable
keep hs6 vietnam_export

* convert hs6 to isic 3 digit
merge m:1 hs6 using "Data\HS2012.dta", keep (match master) keepusing (isic3) generate (merge1)
drop if merge1==1
// for unspecific products //

* collapse to get total trade value by industry
collapse (sum) vietnam_export, by(isic3)

* generate trading variables
gen trading = 1
 
* save dataset
save "Data\tradingsector.dta", replace

********************************************************************************
*** PART 3: Cleaning the PPI datasets
********************************************************************************

* Open the PPI dataset for 2 digit inudstry code
import excel "Data\PPI_Vietnam.xlsx", sheet("V08.43") firstrow clear

* drop the year we don't need
rename A industry_name
keep industry_name isic2 ppi2014 ppi2015 ppi2016 ppi2017 ppi2018 ppi2019
replace ppi2014=ppi2017/ppi2014
replace ppi2015=ppi2017/ppi2015
replace ppi2016=ppi2017/ppi2016
replace ppi2018=ppi2017/ppi2018
replace ppi2019=ppi2017/ppi2019
replace ppi2017=1

* change from wide to long dataset
reshape long ppi, i(industry_name isic2) j(year)

* save dataset
save "Data\ppi.dta", replace

* Open the PPI dataset for letter industry code
import excel "Data\PPI_Vietnam.xlsx", sheet("V08.44") firstrow clear

* drop the year we don't need
rename A industry_name
keep industry_name isic ppi2014 ppi2015 ppi2016 ppi2017 ppi2018 ppi2019
replace ppi2014=ppi2017/ppi2014
replace ppi2015=ppi2017/ppi2015
replace ppi2016=ppi2017/ppi2016
replace ppi2018=ppi2017/ppi2018
replace ppi2019=ppi2017/ppi2019
replace ppi2017=1

* change from wide to long dataset
reshape long ppi, i(industry_name isic) j(year)
rename ppi ppi1

* save dataset
save "Data\ppi1.dta", replace

********************************************************************************
*** PART 4: VES Data Preparation
********************************************************************************

* Open VES dataset
use "Data\VES.dta", clear

* generate ID for companies
replace taxcode2=0 if taxcode2==.
tostring province, gen(province_str)
tostring taxcode, gen(taxcode_str)
tostring taxcode2, gen(taxcode2_str)
gen a = province_str + taxcode_str + taxcode2_str
destring a, replace
egen ID = group(a)
drop province_str taxcode_str taxcode2_str a

* drop observation with same ID and cannot distinguist
sort year ID
quietly by year ID: gen dup = cond(_N==1,0,_n)
tab dup
drop if dup>=1
count if ID==.
drop dup

* drop observations with industry code is not 5 digit codes
drop if main_industry<1000

gen a = int(main_industry/10)
* convert VSIC to ISIC 
// VES 2017 using VSIC 2007 while VES 2018 & 2019 using VSIC 2018 //
generate isic3=int(main_industry/100)
replace isic3=72 if isic3==73
replace isic3=139 if isic3==132 & year<=2017
replace isic3=492 if isic3==493
replace isic3=493 if isic3==494
replace isic3=639 if isic3==632 & year<=2017
replace isic3=799 if isic3==792 & year<=2017
replace isic3=851 if isic3==852 
replace isic3=852 if isic3==853 
replace isic3=851 if a==8521 & year>2017
replace isic3=852 if a==8522 & year>2017
replace isic3=852 if a==8532 & year>2017
replace isic3=853 if a==8533 & year>2017
replace isic3=853 if isic3==854
replace isic3=854 if isic3==855
replace isic3=855 if isic3==856
replace isic3=960 if isic3==961 | isic3==962| isic3==963
label variable isic3 "ISIC code 3 digit for industry"
drop a

* drop observations with wrong isic3 code (the code that does not belong to ISIC)
drop if (isic3>=35 &isic3<=49)| isic3==55|isic3==78|isic3==411|isic3==626|isic3==669|isic3==710

generate isic2 = int(isic3/10)
label variable isic2 "ISIC code 2 digit for industry"

generate isic1 = int(isic3/100)
label variable isic1 "ISIC code 1 digit for industry"

generate isic_name="."
replace isic_name ="Agriculture" if isic2<=3
replace isic_name ="Mining" if isic2>=5 & isic2<=9
replace isic_name ="Manufacturing" if isic2>=10 & isic2<=33
replace isic_name ="Electricity, gas, water supply" if isic2>=35 & isic2<=39
replace isic_name ="Construction" if isic2>=41 & isic2<=43
replace isic_name ="Wholesale - retail trade" if isic2>=45 & isic2<=47
replace isic_name ="Transportation" if isic2>=49 & isic2<=53
replace isic_name ="Accommodation service" if isic2>=55 & isic2<=56
replace isic_name ="Information, communication" if isic2>=58 & isic2<=63
replace isic_name ="Financial and insurance" if isic2>=64 & isic2<=66
replace isic_name ="Real estate" if isic2==68
replace isic_name ="Professional and technical" if isic2>=69 & isic2<=75
replace isic_name ="Administrative service" if isic2>=77 & isic2<=82
replace isic_name ="Public administration" if isic2==84
replace isic_name ="Education" if isic2==85
replace isic_name ="Human health and social work" if isic2>=86 & isic2<=88
replace isic_name ="Arts, entertainment" if isic2>=90 & isic2<=93
replace isic_name ="Others" if isic2>=94

generate isic ="."
replace isic ="A" if isic2<=3
replace isic ="B" if isic2>=5 & isic2<=9
replace isic ="C" if isic2>=10 & isic2<=33
replace isic ="D" if isic2==35
replace isic ="E" if isic2>=36 & isic2<=39
replace isic ="F" if isic2>=41 & isic2<=43
replace isic ="G" if isic2>=45 & isic2<=47
replace isic ="H" if isic2>=49 & isic2<=53
replace isic ="I" if isic2>=55 & isic2<=56
replace isic ="J" if isic2>=58 & isic2<=63
replace isic ="K" if isic2>=64 & isic2<=66
replace isic ="L" if isic2==68
replace isic ="M" if isic2>=69 & isic2<=75
replace isic ="N" if isic2>=77 & isic2<=82
replace isic ="O" if isic2==84
replace isic ="P" if isic2==85
replace isic ="Q" if isic2>=86 & isic2<=88
replace isic ="R" if isic2>=90 & isic2<=93
replace isic ="S" if isic2>=94 & isic2<=96
replace isic ="T" if isic2>=97 & isic2<=98
replace isic ="U" if isic2>=99


* change value export for non-export firms to 0
replace foreign_receive=0 if foreign_sell==0 | foreign_sell==.

* change value of fdi investment for non-fdi firms to 0
replace capital_foreign=0 if fdi==0

* Create exporter variables:
gen exporter=0
replace exporter=1 if foreign_sell==1
replace exporter=0 if foreign_sell==0
label variable exporter "Whether company has exporting activities or not"

* generate variable for size of companies by number of labor
generate size=0
replace size=1 if cur_employees<100 & cur_employees>=10
replace size=2 if cur_employees>=100 & cur_employees<200
replace size=3 if cur_employees>=200
label variable size "Size of company: super micro - small - medium - large"

* Generate Log of FDI (Foreign capital invested"
gen logFDI = log(capital_foreign+1)
label variable logFDI "Log of foreign capital invested so far"
gen logFDI2 = log(capital_foreign_invest+1)
label variable logFDI2 "Log of foreing capital invested this year"
sort year
by year: sum logFDI*

********************************************************************************
*** PART 5: Final Merge 
********************************************************************************

* Merge data with "tariff_difference" calculated at 3 digit industry code
merge m:1 year isic3 using "Data\tariff_difference.dta", keep (match master) generate (merge2)

replace deta_tariff=0 if deta_tariff==.
replace wt_tariff=0 if wt_tariff==.
replace vol_tariff=0 if vol_tariff==.

replace target=0 if target==.
drop merge2

* Generate the yearly deflator (CPI) to convert everything to 2017 dong
gen ycpi=1 if year==2017
replace ycpi= 153.632/ 159.07 if year==2018
replace ycpi=153.632/163.517 if year==2019
label variable ycpi "CPI: Multiply from base year to get to 2017"

* Generate the yearly deflator (PPI) 
* merge with industry has ppi at 2 digit code
merge m:1 year isic2 using "Data\ppi.dta", keep (match master) keepusing (ppi) generate (merge4)

* merge with industry has ppi at letter code
merge m:1 year isic using "Data\ppi1.dta", keep (match master) keepusing (ppi1) generate (merge5)
replace ppi=ppi1 if ppi==. & merge5==3
replace ppi=1 if ppi==.
drop ppi1
label variable ppi "Producer Price Index with base year 2017"

********************************************************************************
*** PART 6: Calculate Real Terms and Necessary Variables
********************************************************************************

* Log of real net revenue in 2017 prices
gen rsale = (revenue_net)* ppi
label variable rsale "Real total net revenue after deduction in 2017 prices"
generate logsale = log(rsale+1)
label variable logsale "Log of real net revenue in 2017 prices"
sort year
by year: sum logsale rsale

* Log of real total export in 2017 prices
sort year
by year: sum foreign_receive
gen rexport = (foreign_receive)* ppi
label variable rexport "Real total money received from foreign parners in 2017 prices"
gen logexport=log(rexport)
replace logexport=log(rexport+1) if rexport==0
label variable logexport "Logarithm of total money received from foreign partners"

* Log of number of employees
generate loglabor = log(cur_employees)
label variable loglabor "Log of number of employees"
sort year
by year: sum loglabor
by year: sum cur_employees

* Log of total payment to employees
replace labor_payment=. if labor_payment<0
gen rpayment = labor_payment*ycpi
label variable rpayment "Real total total payment to employees in 2017 prices using CPI"

generate log_pay = log(rpayment)
replace log_pay=log(rpayment+1) if rpayment==0
label variable log_pay "Log of total payment to employees adjusted CPI"
sort year
by year: sum log_pay
by year: sum labor_payment

* Calculate wage bill per worker
gen rwage_bill = rpayment/cur_employees
label variable rwage_bill "Real wage bill per worker in 2017 prices using CPI"

gen log_wage = log(rwage_bill)
replace log_wage=log(rwage_bill+1) if log_wage==0
label variable log_wage "Log of payment per worker adjusted CPI"
by year: sum log_wage
by year: sum rwage_bill

* Log of real total fixed assets in 2017 prices
gen rfixed = (cur_fixed)* ppi
label variable rfixed "Real total fixed assets in 2017 prices using PPI"

generate log_fixed = log(rfixed+1)
label variable log_fixed "Log of total fixed asset adjusted PPI"
sort year
by year: sum log_fixed rfixed

* Log of real total fixed assets per worker
gen fix_worker = rfixed/cur_employees
gen logfix_worker = log(fix_worker)
label var logfix_worker "Log of (real) total fixed assets per worker"

* Log of real investment during the year
gen rinvest=(invest_total)*ppi
label variable rinvest "Real total investment in 2017 prices"
sort year
by year: sum rinvest

gen loginvest = log(rinvest)
replace loginvest = log(rinvest + 1) if rinvest==0
label variable loginvest "Log of (real) total investment in 2017 prices"

* Log of real investment per worker 
gen inv_worker=rinvest/cur_employees
gen linv_worker=log(inv_worker+1)
label var linv_worker "Log of (real) total investment per worker"

* Manufacturing and Trading Sector Indicators 
generate manu =0
replace manu=1 if (isic3>=101 & isic3<=332)
label variable manu "Manufacturing Sectors"

* generate indicator for agriculture, forestry, and aquaculture
gen agri = (isic3<=32)

merge m:1 isic3 using "Data\tradingsector.dta", keep (match master) keepusing (trading) generate (merge3)
replace trading = 0 if trading ==.
label variable trading "Trading Sectors"
drop merge3

format rexport %12.0f 

*Log of productivity
gen productivity = rsale / cur_employees
replace productivity=log(productivity)
label variable productivity "Firm productivity defined by net revenue divided by employees (log)"

* Type of Export Firms (2)

*By Investment Type
gen DN=0
replace DN=1 if exporter==0 & fdi==0
gen DE=0
replace DE=1 if exporter==1 & fdi==0
gen FN=0
replace FN=1 if exporter==0 & fdi==1
gen FE=0
replace FE=1 if exporter==1 & fdi==1

label var DE "Domestic Exporters"
label var FN "FDI Non-exporters"
label var FE "FDI Exporters"


********************************************************************************
*** PART 7: Sample Selection and Define Exporter's Status
********************************************************************************

*(1) Basic Sample Selection
* drop observations with a missing value for industry
drop if isic3==. 

* keep only the firms have positive sale (net revenue) and fixed assest
keep if revenue_net>=0 
keep if cur_fixed>=0
keep if pre_fixed>=0
keep if rinvest>=0

* Identify and drop extreme observations: change in main variables (number of labor, fixed assets, foreign capital, total sales, export value, investment) bigger than 8 standard deviations from the mean

sort ID year
# delimit ;
for X in var cur_employees cur_fixed capital_foreign revenue_net foreign_receive invest:
egen m_X=mean(X)
\ egen s_X=sd(X)
\ gen z_X= (X-m_X)/s_X
\sum z_X
\drop if z_X>8 & z_X~=.
\drop if z_X<-8 & z_X~=.;

# delimit cr

save "Data\master.dta", replace
